*****************************************************************************************************
* Purpose: Clean HAALSI mortality data (interview and death dates) for analysis
* Written by:	Hunter Green
* Last updated: 2024-12-28
* Stata version: 18.0
*****************************************************************************************************

* Toggle for whether David is working on this
global David = "F"


*****************************************************************************************************
* Options, global macros
*****************************************************************************************************
* Options
version 18.0
clear all
set more off
set varabbrev off
pause on

* Global folder macros
if "${David}" == "T" {   //David add your folder paths here
	* paper
	global paper_data "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HPACC/Aging projects/HRS diabetes mortality/Data/Raw"
	* HAALSI
	global haalsi "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HPACC/Aging projects/HRS diabetes mortality/Data/Raw/HAALSI"
	}
else {
	* paper
	global paper_data "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HPACC/Aging projects/HRS diabetes mortality/Data/Raw"
	* HAALSI
	global haalsi "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HPACC/Aging projects/HRS diabetes mortality/Data/Raw/HAALSI"
}


*****************************************************************************************************
* Clean data
*****************************************************************************************************
* Open HAALSI
use prim_key w2status w3status w1c_int_month w1c_int_year w2c_int_year w2c_int_month w3c_int_year w3c_int_month ///
    w3begintime w3endtime w3_Date_of_Death using "${haalsi}/HAALSI W3 Longitudinal Data MAY_18_2023.dta/HAALSI W3 Longitudinal Data MAY_18_2023.dta", clear

*** Wave 1
* Wave 1 interview status
gen inw1 = 1
*n=5,059

* Year and month of Wave 1 interview
gen w1yr = w1c_int_year
gen w1mo = w1c_int_month

gen w1_ym = ym(w1yr, w1mo)
format w1_ym %tm
drop w1c_int_year w1c_int_month


*** Wave 2
* Wave 2 interview status
gen inw2 = 0
replace inw2 = 1 if w2status == 1
drop w2status

* Year and month of Wave 2 interview
gen w2yr = w2c_int_year
gen w2mo = w2c_int_month

gen w2_ym = ym(w2yr, w2mo)
format w2_ym %tm
drop w2c_int_year w2c_int_month


*** Wave 3
* Wave 3 interview status
gen inw3 = 0
replace inw3 = 1 if w3status == 1
drop w3status

* Year and month of Wave 3 interview (both missing for one respondent)
* 	Adjust year of Wave 3 interview for 1 respondent
gen w3yr = w3c_int_year
gen w3mo = w3c_int_month

list prim_key w3begintime w3endtime w3yr w3mo if inw3 == 1 & mi(w3c_int_year)
* Interview began on 2022-02-19 so assign 2022-02
replace w3mo = 2 if inw3 == 1 & mi(w3yr)
replace w3yr = 2022 if inw3 == 1 & mi(w3yr)

gen w3_ym = ym(w3yr, w3mo)
format w3_ym %tm
drop w3c_int_year w3c_int_month w3begintime w3endtime


*** Drop variables
drop w1yr w1mo w2yr w2mo w3mo


*** Year and month of biomarker collection (same as Wave 1 interview)
gen biomarker_ym = w1_ym
format biomarker_ym %tm


*** Death
* "True" death status and time
gen true_deceased = 1 if !mi(w3_Date_of_Death)

gen deathyr = year(w3_Date_of_Death)
gen deathmo = month(w3_Date_of_Death)
gen true_death_ym = ym(deathyr, deathmo)
format true_death_ym %tm

* "Study" death status and time
gen study_deceased = true_deceased
gen study_death_ym = true_death_ym
format study_death_ym %tm

* In study if died before 2020, otherwise censor
replace study_deceased = . if inrange(deathyr,2020,2022)
replace study_death_ym = . if inrange(deathyr,2020,2022)


*** Censor
* Indicator for end of study (December 2019)
gen end_study_ym = ym(2019, 12)
format end_study_ym %tm

* Year and month of censor
gen censor_ym = end_study_ym if inrange(deathyr,2020,2022)
replace censor_ym = end_study_ym if inrange(w3yr,2021,2022) & mi(study_death_ym) & mi(censor_ym)
replace censor_ym = w3_ym if !mi(w3_ym) & w3yr == 2019 & mi(study_death_ym) & mi(censor_ym)
replace censor_ym = w2_ym if !mi(w2_ym) & mi(w3_ym) & mi(study_death_ym) & mi(censor_ym)
format censor_ym %tm
drop w3_Date_of_Death w3yr deathyr deathmo end_study_ym


*** Order variables
order prim_key inw1 inw2 inw3 w1_ym w2_ym w3_ym biomarker_ym true_deceased true_death_ym study_deceased ///
      study_death_ym censor_ym


*** Label variables	
label variable inw1 "HAALSI in Wave 1 sample"
label variable inw2 "HAALSI in Wave 2 sample"
label variable inw3 "HAALSI in Wave 3 sample"
label variable w1_ym "HAALSI Wave 1 interview year & month"
label variable w2_ym "HAALSI Wave 2 interview year & month"
label variable w3_ym "HAALSI Wave 3 interview year & month"
label variable biomarker_ym "HAALSI year & month of biomarker collection"
label variable true_deceased "HAALSI R was deceased by November 2022 (True)"
label variable true_death_ym "HAALSI year & month of mortality (True)"
label variable study_deceased "HAALSI R was deceased by December 2019 (Study)"
label variable study_death_ym "HAALSI year & month of mortality to December 2019 (Study)"
label variable censor_ym "HAALSI year & month of censor"


*** Save data
save "${paper_data}/haalsi_dates.dta", replace

